# Forelesning 7, onsdag 01.02.2017

#1# Ubalansert design med to numeriske kovariater og
## en faktor med 4 nivå
set.seed(3)
f1 <- factor(sample(c("a","b","c","d"),20,repl=T)) # en faktor
x1 <- rnorm(20,(1:4)[f1]) #Hva gjør denne?
x2 <- rnorm(20,x1,sd=.5)
y <- rnorm(20,
           mean=10 + 0.01*x1 + 0.2*x2 + c(0,1,1.5,-.5)[f1],
           sd=.5)
completedata <- data.frame(y,x1,x2,f1)
rm(x1,x2,f1)
trainingset <- completedata[1:10,]
validationset <- completedata[11:20,]

trainingset
fullmodel <- lm(y~x1+x2+f1,data=trainingset)
summary(fullmodel)


#drop1(): tester forskjellige reduserte modeller mot en gitt modell 
#ved å utelate (drop) en forklaringsvariabel om gangen
drop1(fullmodel,test="F") #Hvilken variabel kan droppes?
reduced <- lm(y~x2+f1,data=trainingset)
summary(reduced)
drop1(reduced,test="F")
reduced2 <- lm(y~f1,data=trainingset)
summary(reduced2)
drop1(reduced2,test="F")
add1(reduced2, .~.+x1+x2+f1,test="F")   #Kan 

predict(fullmodel,newdata=validationset)  #Predikerte verdier for validersingsdata
sum((validationset$y - predict(fullmodel,newdata=validationset))^2)
sum((validationset$y - predict(reduced2,newdata=validationset))^2)
plot(validationset$y,predict(reduced2,newdata=validationset))

par(mfrow=c(2,2))
plot(trainingset$y,predict(fullmodel))
abline(0,1)
plot(trainingset$y,predict(reduced2))
abline(0,1)
plot(validationset$y,predict(fullmodel,newdata=validationset))
abline(0,1)
plot(validationset$y,predict(reduced2,newdata=validationset))
abline(0,1)
par(mfrow=c(1,1))